From 751b9ebab8c6bea9cf125d6c122613a5a8c89df4 Mon Sep 17 00:00:00 2001 From: Brian Wolff Date: Fri, 6 Dec 2013 15:34:49 -0400 Subject: [PATCH] Add method to get entire text layer. This could be useful for search Change-Id: I22f5fc47aef3cf362cdf630980deea48fe531d45 --- includes/media/MediaHandler.php | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/includes/media/MediaHandler.php b/includes/media/MediaHandler.php index 1dc74ce9fe..e9d0039f06 100644 --- a/includes/media/MediaHandler.php +++ b/includes/media/MediaHandler.php @@ -426,12 +426,37 @@ abstract class MediaHandler { * Currently overloaded by PDF and DjVu handlers * @param File $image * @param int $page Page number to get information for - * @return bool|string Page text or false when no text found. + * @return bool|string Page text or false when no text found or if + * unsupported. */ function getPageText( $image, $page ) { return false; } + /** + * Get the text of the entire document. + * @param File $file + * @return bool|string The text of the document or false if unsupported. + */ + public function getEntireText( File $file ) { + $numPages = $file->pageCount(); + if ( !$numPages ) { + // Not a multipage document + return $this->getPageText( $file, 1 ); + } + $document = ''; + for( $i = 1; $i <= $numPages; $i++ ) { + $curPage = $this->getPageText( $file, $i ); + if ( is_string( $curPage ) ) { + $document .= $curPage . "\n"; + } + } + if ( $document !== '' ) { + return $document; + } + return false; + } + /** * Get an array structure that looks like this: * -- 2.20.1